MAPEO de COVID-19¶
Base de datos¶
import pandas as pd
df = pd.read_csv("C:/Users/Michael Encalada/Documents/GitHub/Magallanes/positivos_covid (1).csv", delimiter=';')
print(df.head())
FECHA_CORTE DEPARTAMENTO PROVINCIA DISTRITO METODODX EDAD SEXO \ 0 20241203 TUMBES TUMBES TUMBES AG 46.0 FEMENINO 1 20241203 LIMA LIMA JESUS MARIA AG 69.0 FEMENINO 2 20241203 SAN MARTIN MOYOBAMBA MOYOBAMBA AG 55.0 FEMENINO 3 20241203 AREQUIPA CAYLLOMA COPORAQUE AG 50.0 MASCULINO 4 20241203 LIMA LIMA JESUS MARIA AG 58.0 MASCULINO FECHA_RESULTADO UBIGEO id_persona 0 20221207.0 240101.0 203499.0 1 20230822.0 150113.0 221397.0 2 20240108.0 220101.0 295651.0 3 20230824.0 40506.0 851625.0 4 20221217.0 150113.0 287786.0
df=df[df.EDAD>50]
df.METODODX.value_counts()
AG 599842 PCR 400134 PR 293382 Name: METODODX, dtype: int64
# NUEVA VARIABLE "AÑO"
df['FECHA_RESULTADO'] = pd.to_datetime(df['FECHA_RESULTADO'], format='%Y%m%d', errors='coerce')
df['AÑO'] = df['FECHA_RESULTADO'].dt.year
df.AÑO.head()
1 2023.0 2 2024.0 4 2022.0 7 2023.0 9 2020.0 Name: AÑO, dtype: float64
df['AÑO'] = df['AÑO'].astype('Int64') #a numeros enteros
print(df[['AÑO']].head())
AÑO 1 2023 2 2024 4 2022 7 2023 9 2020
Agreggate¶
# AG: Antigenos
# PCR: Prueba molecular
# PR: Prueba rápida serológica
df = df[df['AÑO'] != 1899]
df = df[df['PROVINCIA'] != 'EN INVESTIGACIÓN']
indexList=['AÑO','DEPARTAMENTO','PROVINCIA','METODODX']
aggregator={'METODODX':[len]}
prov=df.groupby(indexList,observed=True).agg(aggregator)
prov
| METODODX | ||||
|---|---|---|---|---|
| len | ||||
| AÑO | DEPARTAMENTO | PROVINCIA | METODODX | |
| 2020 | AMAZONAS | BAGUA | PCR | 203 |
| PR | 2368 | |||
| BONGARA | PCR | 20 | ||
| PR | 92 | |||
| CHACHAPOYAS | PCR | 90 | ||
| ... | ... | ... | ... | ... |
| 2024 | TUMBES | TUMBES | PCR | 15 |
| ZARUMILLA | PCR | 6 | ||
| UCAYALI | CORONEL PORTILLO | AG | 16 | |
| PCR | 17 | |||
| PADRE ABAD | AG | 1 |
1886 rows × 1 columns
#wide
Draft=prov.unstack(3).fillna(0) #leftmost index in rows
Draft
| METODODX | |||||
|---|---|---|---|---|---|
| len | |||||
| METODODX | AG | PCR | PR | ||
| AÑO | DEPARTAMENTO | PROVINCIA | |||
| 2020 | AMAZONAS | BAGUA | 0.0 | 203.0 | 2368.0 |
| BONGARA | 0.0 | 20.0 | 92.0 | ||
| CHACHAPOYAS | 0.0 | 90.0 | 419.0 | ||
| CONDORCANQUI | 0.0 | 14.0 | 540.0 | ||
| LUYA | 0.0 | 1.0 | 89.0 | ||
| ... | ... | ... | ... | ... | ... |
| 2024 | TUMBES | CONTRALMIRANTE VILLAR | 0.0 | 4.0 | 0.0 |
| TUMBES | 11.0 | 15.0 | 0.0 | ||
| ZARUMILLA | 0.0 | 6.0 | 0.0 | ||
| UCAYALI | CORONEL PORTILLO | 16.0 | 17.0 | 0.0 | |
| PADRE ABAD | 1.0 | 0.0 | 0.0 | ||
920 rows × 3 columns
# % de personas con prueba AG
Draft['AG_pct']=Draft.iloc[:,1]/(Draft.iloc[:,0] + Draft.iloc[:,1])
prov_Ag=Draft['AG_pct'].unstack('AÑO').fillna(0)
prov_Ag
| AÑO | 2020 | 2021 | 2022 | 2023 | 2024 | |
|---|---|---|---|---|---|---|
| DEPARTAMENTO | PROVINCIA | |||||
| AMAZONAS | BAGUA | 1.000000 | 0.144654 | 0.233813 | 0.240000 | 0.470588 |
| BONGARA | 1.000000 | 0.291971 | 0.466019 | 0.454545 | 0.928571 | |
| CHACHAPOYAS | 1.000000 | 0.448919 | 0.414784 | 0.219178 | 0.322581 | |
| CONDORCANQUI | 1.000000 | 0.033333 | 0.025000 | 0.000000 | 0.000000 | |
| LUYA | 1.000000 | 0.036585 | 0.059361 | 0.111111 | 0.750000 | |
| ... | ... | ... | ... | ... | ... | ... |
| TUMBES | ZARUMILLA | 0.923077 | 0.315556 | 0.492958 | 0.588235 | 1.000000 |
| UCAYALI | ATALAYA | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| CORONEL PORTILLO | 0.996875 | 0.186831 | 0.503120 | 0.545455 | 0.515152 | |
| PADRE ABAD | 0.000000 | 0.078512 | 0.193694 | 0.000000 | 0.000000 | |
| PURUS | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
196 rows × 5 columns
#data type
prov_Ag.columns #esta como numero los años
Index([2020, 2021, 2022, 2023, 2024], dtype='Int64', name='AÑO')
prov_Ag.columns=['year'+str(x) for x in prov_Ag.columns]
prov_Ag
| year2020 | year2021 | year2022 | year2023 | year2024 | ||
|---|---|---|---|---|---|---|
| DEPARTAMENTO | PROVINCIA | |||||
| AMAZONAS | BAGUA | 1.000000 | 0.144654 | 0.233813 | 0.240000 | 0.470588 |
| BONGARA | 1.000000 | 0.291971 | 0.466019 | 0.454545 | 0.928571 | |
| CHACHAPOYAS | 1.000000 | 0.448919 | 0.414784 | 0.219178 | 0.322581 | |
| CONDORCANQUI | 1.000000 | 0.033333 | 0.025000 | 0.000000 | 0.000000 | |
| LUYA | 1.000000 | 0.036585 | 0.059361 | 0.111111 | 0.750000 | |
| ... | ... | ... | ... | ... | ... | ... |
| TUMBES | ZARUMILLA | 0.923077 | 0.315556 | 0.492958 | 0.588235 | 1.000000 |
| UCAYALI | ATALAYA | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| CORONEL PORTILLO | 0.996875 | 0.186831 | 0.503120 | 0.545455 | 0.515152 | |
| PADRE ABAD | 0.000000 | 0.078512 | 0.193694 | 0.000000 | 0.000000 | |
| PURUS | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
196 rows × 5 columns
prov_Ag.reset_index(inplace=True)
prov_Ag
| DEPARTAMENTO | PROVINCIA | year2020 | year2021 | year2022 | year2023 | year2024 | |
|---|---|---|---|---|---|---|---|
| 0 | AMAZONAS | BAGUA | 1.000000 | 0.144654 | 0.233813 | 0.240000 | 0.470588 |
| 1 | AMAZONAS | BONGARA | 1.000000 | 0.291971 | 0.466019 | 0.454545 | 0.928571 |
| 2 | AMAZONAS | CHACHAPOYAS | 1.000000 | 0.448919 | 0.414784 | 0.219178 | 0.322581 |
| 3 | AMAZONAS | CONDORCANQUI | 1.000000 | 0.033333 | 0.025000 | 0.000000 | 0.000000 |
| 4 | AMAZONAS | LUYA | 1.000000 | 0.036585 | 0.059361 | 0.111111 | 0.750000 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 191 | TUMBES | ZARUMILLA | 0.923077 | 0.315556 | 0.492958 | 0.588235 | 1.000000 |
| 192 | UCAYALI | ATALAYA | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 193 | UCAYALI | CORONEL PORTILLO | 0.996875 | 0.186831 | 0.503120 | 0.545455 | 0.515152 |
| 194 | UCAYALI | PADRE ABAD | 0.000000 | 0.078512 | 0.193694 | 0.000000 | 0.000000 |
| 195 | UCAYALI | PURUS | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
196 rows × 7 columns
MAPA¶
mapLink='https://github.com/SocialAnalytics-StrategicIntelligence/GeoDF_Analytics/raw/main/maps/ProvsINEI2023.zip'
#panadas vienen como zip hy, r no puede abrir por si acaso
import geopandas as gpd
provmap=gpd.read_file(mapLink)
provmap.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 196 entries, 0 to 195 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 OBJECTID 196 non-null float64 1 CCDD 196 non-null object 2 CCPP 196 non-null object 3 DEPARTAMEN 196 non-null object 4 PROVINCIA 196 non-null object 5 geometry 196 non-null geometry dtypes: float64(1), geometry(1), object(4) memory usage: 9.3+ KB
provmap['location']=['+'.join(x[0]) for x in zip(provmap.iloc[:,3:5].values)]
provmap.head(10)
| OBJECTID | CCDD | CCPP | DEPARTAMEN | PROVINCIA | geometry | location | |
|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 01 | 01 | AMAZONAS | CHACHAPOYAS | POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... | AMAZONAS+CHACHAPOYAS |
| 1 | 2.0 | 01 | 02 | AMAZONAS | BAGUA | POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... | AMAZONAS+BAGUA |
| 2 | 3.0 | 01 | 03 | AMAZONAS | BONGARA | POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... | AMAZONAS+BONGARA |
| 3 | 4.0 | 01 | 04 | AMAZONAS | CONDORCANQUI | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | AMAZONAS+CONDORCANQUI |
| 4 | 5.0 | 01 | 05 | AMAZONAS | LUYA | POLYGON ((-78.13023 -5.90370, -78.13011 -5.904... | AMAZONAS+LUYA |
| 5 | 6.0 | 01 | 06 | AMAZONAS | RODRIGUEZ DE MENDOZA | POLYGON ((-77.44452 -6.05002, -77.44387 -6.050... | AMAZONAS+RODRIGUEZ DE MENDOZA |
| 6 | 7.0 | 01 | 07 | AMAZONAS | UTCUBAMBA | POLYGON ((-78.09288 -5.36258, -78.09288 -5.364... | AMAZONAS+UTCUBAMBA |
| 7 | 8.0 | 02 | 01 | ANCASH | HUARAZ | POLYGON ((-77.39870 -9.35563, -77.39852 -9.356... | ANCASH+HUARAZ |
| 8 | 9.0 | 02 | 02 | ANCASH | AIJA | POLYGON ((-77.61368 -9.64900, -77.61241 -9.649... | ANCASH+AIJA |
| 9 | 10.0 | 02 | 03 | ANCASH | ANTONIO RAYMONDI | POLYGON ((-77.08856 -8.97496, -77.08804 -8.975... | ANCASH+ANTONIO RAYMONDI |
print(prov_Ag.columns)
Index(['DEPARTAMENTO', 'PROVINCIA', 'year2020', 'year2021', 'year2022',
'year2023', 'year2024'],
dtype='object')
# Unir las columnas 'departamento' y 'provincia' en una nueva columna 'location'
prov_Ag['location'] = prov_Ag.apply(lambda row: f"{row['DEPARTAMENTO']} + {row['PROVINCIA']}", axis=1)
# Ver las primeras filas para confirmar
prov_Ag.head()
| DEPARTAMENTO | PROVINCIA | year2020 | year2021 | year2022 | year2023 | year2024 | location | |
|---|---|---|---|---|---|---|---|---|
| 0 | AMAZONAS | BAGUA | 1.0 | 0.144654 | 0.233813 | 0.240000 | 0.470588 | AMAZONAS + BAGUA |
| 1 | AMAZONAS | BONGARA | 1.0 | 0.291971 | 0.466019 | 0.454545 | 0.928571 | AMAZONAS + BONGARA |
| 2 | AMAZONAS | CHACHAPOYAS | 1.0 | 0.448919 | 0.414784 | 0.219178 | 0.322581 | AMAZONAS + CHACHAPOYAS |
| 3 | AMAZONAS | CONDORCANQUI | 1.0 | 0.033333 | 0.025000 | 0.000000 | 0.000000 | AMAZONAS + CONDORCANQUI |
| 4 | AMAZONAS | LUYA | 1.0 | 0.036585 | 0.059361 | 0.111111 | 0.750000 | AMAZONAS + LUYA |
## Limpieza
#!pip install unidecode
import unidecode
byePunctuation=lambda x: unidecode.unidecode(x)
prov_Ag['location']=prov_Ag['location'].apply(byePunctuation)
provmap['location']=provmap['location'].apply(byePunctuation)
prov_Ag['location']=prov_Ag.location.str.replace("\-|\_|\s+","",regex=True)
provmap['location']=provmap.location.str.replace("\-|\_|\s+","",regex=True)
nomatch_df=set(prov_Ag.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(prov_Ag.location)
len(nomatch_df), len(nomatch_gdf) #!!!
(2, 2)
#!pip install thefuzz
# pick the closest match from nomatch_gdf for a value in nomatch_df
from thefuzz import process
[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
[('ANCASH+ANTONIORAIMONDI', ('ANCASH+ANTONIORAYMONDI', 95)),
('ICA+NAZCA', ('ICA+NASCA', 89))]
# Diccionario de cambios
{dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
{'ANCASH+ANTONIORAIMONDI': 'ANCASH+ANTONIORAYMONDI', 'ICA+NAZCA': 'ICA+NASCA'}
# then:
changesinDF={dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
prov_Ag.replace({'location': changesinDF}, inplace=True)
# comprobamos
nomatch_df=set(prov_Ag.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(prov_Ag.location)
[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
[]
#merge
prov_Ag_map=provmap.merge(prov_Ag, on='location',how='left',indicator='flag')
prov_Ag_map.info()
<class 'geopandas.geodataframe.GeoDataFrame'> Int64Index: 196 entries, 0 to 195 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 OBJECTID 196 non-null float64 1 CCDD 196 non-null object 2 CCPP 196 non-null object 3 DEPARTAMEN 196 non-null object 4 PROVINCIA_x 196 non-null object 5 geometry 196 non-null geometry 6 location 196 non-null object 7 DEPARTAMENTO 196 non-null object 8 PROVINCIA_y 196 non-null object 9 year2020 196 non-null float64 10 year2021 196 non-null float64 11 year2022 196 non-null float64 12 year2023 196 non-null float64 13 year2024 196 non-null float64 14 flag 196 non-null category dtypes: category(1), float64(6), geometry(1), object(7) memory usage: 23.3+ KB
prov_Ag_map['flag']=prov_Ag_map.flag.astype(str)
#
bye=['DEPARTAMEN', 'PROVINCIA_x', 'CCPP','CCDD']
prov_Ag_map.drop(columns=bye,inplace=True)
prov_Ag_map.head()
| OBJECTID | geometry | location | DEPARTAMENTO | PROVINCIA_y | year2020 | year2021 | year2022 | year2023 | year2024 | flag | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... | AMAZONAS+CHACHAPOYAS | AMAZONAS | CHACHAPOYAS | 1.0 | 0.448919 | 0.414784 | 0.219178 | 0.322581 | both |
| 1 | 2.0 | POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... | AMAZONAS+BAGUA | AMAZONAS | BAGUA | 1.0 | 0.144654 | 0.233813 | 0.240000 | 0.470588 | both |
| 2 | 3.0 | POLYGON ((-77.72759 -5.14030, -77.72361 -5.140... | AMAZONAS+BONGARA | AMAZONAS | BONGARA | 1.0 | 0.291971 | 0.466019 | 0.454545 | 0.928571 | both |
| 3 | 4.0 | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | AMAZONAS+CONDORCANQUI | AMAZONAS | CONDORCANQUI | 1.0 | 0.033333 | 0.025000 | 0.000000 | 0.000000 | both |
| 4 | 5.0 | POLYGON ((-78.13023 -5.90370, -78.13011 -5.904... | AMAZONAS+LUYA | AMAZONAS | LUYA | 1.0 | 0.036585 | 0.059361 | 0.111111 | 0.750000 | both |
# lidiar con los CEROS
prov_Ag_map.fillna(0,inplace=True)
Guardar el geoDF
#crear carpeta maps, donde guardar
import os
prov_Ag_map.to_file(os.path.join('C:/Users/Michael Encalada/Documents/GitHub/Magallanes/week10_spatial/maps/',"provinciasPeru.gpkg"), layer='provinciasDengue', driver="GPKG")
Explorar variables¶
# statistics
prov_Ag_map.year2022.describe()
count 196.000000 mean 0.125021 std 0.161102 min 0.000000 25% 0.016327 50% 0.068471 75% 0.170919 max 1.000000 Name: year2022, dtype: float64
#! pip install seaborn
# grafico
import seaborn as sea
sea.boxplot(prov_Ag_map.year2022, color='yellow',orient='h')
<Axes: xlabel='year2022'>
Imterpretación: El 74% de las provincias no llegan al 40% de realización de pruebas antigenas para detectar covid 19.
#grafica de trasnformacion cuantilica?
from sklearn.preprocessing import QuantileTransformer
qt = QuantileTransformer(n_quantiles=100, random_state=0,output_distribution='normal')
qt_result=qt.fit_transform(prov_Ag_map[['year2022']])
sea.boxplot(qt_result, color='yellow',orient='h')
<Axes: >
Explicación: Transofrmador quantilico: evita que los atipicos sesguen los resultados No estas eliminando los atipicos, lo estas normalizando haciendolo parte de la distribución. En todo la politica publica, hay que pensar en lo intervinible. Necesito algo que sea operable. Lo hago viendo al grupo donde se puede trabajr. Eso es el amarillo
prov_Ag_map['year_2022_qt']= qt_result
Correlación Espacial¶
Vecindario
#!pip install pysal
#conda install -c conda-forge pysal
from libpysal.weights import Queen, Rook, KNN
# rook
w_rook = Rook.from_dataframe(prov_Ag_map,use_index=False)
# queen
w_queen = Queen.from_dataframe(prov_Ag_map,use_index=False)
# k nearest neighbors
w_knn = KNN.from_dataframe(prov_Ag_map, k=8) #quiero 8 mas cercanos a mi.
Moran's correlation¶
# needed for spatial correlation
w_queen.transform = 'R'
pd.DataFrame(*w_queen.full()).sum(axis=1) # 1 means both are neighbors
0 1.0
1 1.0
2 1.0
3 1.0
4 1.0
...
191 1.0
192 1.0
193 1.0
194 1.0
195 1.0
Length: 196, dtype: float64
INDICE DE MORAN
Quiero ver si hay correlacion entre mi vecindario con dengue. Caunto dengue hay en mi barro
ojo: que a veces la frecuencia entre personas (redes) va mas alla del espacio. frecuentas con un grupo a pesar de distancias.
from esda.moran import Moran
moranCOVID = Moran(prov_Ag_map['year_2022_qt'], w_queen)
moranCOVID.I,moranCOVID.p_sim
#correlacion a 0.51 y es significativa por es 0.001 ***
(0.19136370498258676, 0.001)
interpretacion: Si sale no significativo, significa que el virus no tiene una variable espacial. no tiene efecto La cercania no tiene una relacion con el territorio. Vota en contra de keiko, porque done vivo mis vecinos tambien votan en contra de keiko
Grafico
from splot.esda import moran_scatterplot
import matplotlib.pyplot as plt
fig, ax = moran_scatterplot(moranCOVID)
ax.set_xlabel('Covid_prueba_share')
ax.set_ylabel('SpatialLag_Covid_prueba_share')
Text(0, 0.5, 'SpatialLag_Covid_prueba_share')
Interpretacion: a parte de la linea, si no tambien los cuadrantes Los que estan a la izquierda, abajo, son provincias donde el dengue esta bajo y hay cercania entre provincias bajo.
A la izquierda arriba, es dengue alto con vecinos cercanas con dengue alto. A la derecha abajo, no contagiados, casi asilados.
izquierda abajo COLD SPOT ARRIB derecha, hot spot
Otro grafico
# The scatterplot with local info
from esda.moran import Moran_Local
# calculate Moran_Local and plot
lisaCOVID = Moran_Local(y=prov_Ag_map['year_2022_qt'], w=w_knn,seed=2022)
fig, ax = moran_scatterplot(lisaCOVID,p=0.05)
ax.set_xlabel('Covid_prueba_share')
ax.set_ylabel('SpatialLag_Covid_prueba_share');
Mapa de peru
# the map with the spots and outliers
from splot.esda import lisa_cluster
f, ax = plt.subplots(1, figsize=(12, 12))
plt.title('Spots and Outliers')
fig = lisa_cluster(lisaCOVID,
prov_Ag_map,ax=ax,
legend_kwds={'loc': 'center left',
'bbox_to_anchor': (0.7, 0.6)})
Agregar data to my gdf
# quadrant
lisaCOVID.q
array([1, 1, 1, 1, 1, 1, 1, 4, 3, 3, 3, 3, 3, 3, 4, 3, 3, 4, 4, 3, 3, 3,
3, 3, 4, 3, 3, 1, 4, 1, 4, 1, 4, 1, 4, 3, 3, 3, 4, 3, 4, 3, 1, 3,
3, 1, 1, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 3, 2, 2, 1, 2, 2, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 4, 3, 3, 4, 3, 1, 1, 4, 3, 1, 1, 4, 4, 1, 1, 1, 1, 1, 2, 1, 1,
2, 1, 1, 1, 1, 2, 4, 1, 1, 3, 4, 4, 1, 4, 1, 1, 1, 1, 4, 3, 1, 1,
1, 1, 4, 3, 1, 1, 1, 1, 4, 1, 2, 1, 2, 4, 2, 4, 4, 2, 4, 1, 4, 1,
4, 1, 2, 4, 1, 1, 1, 2, 4, 3, 2, 4, 4, 4, 4, 1, 2, 3, 4, 4, 4, 1,
4, 2, 1, 1, 1, 1, 1, 1, 4, 4, 3, 3, 3, 1, 1, 1, 4, 2, 1, 3])
# significance
lisaCOVID.p_sim
array([0.165, 0.301, 0.099, 0.016, 0.051, 0.129, 0.029, 0.209, 0.087,
0.002, 0.012, 0.012, 0.013, 0.004, 0.335, 0.11 , 0.005, 0.09 ,
0.017, 0.013, 0.079, 0.13 , 0.002, 0.017, 0.132, 0.003, 0.02 ,
0.152, 0.181, 0.441, 0.058, 0.114, 0.239, 0.075, 0.404, 0.084,
0.019, 0.098, 0.181, 0.066, 0.157, 0.132, 0.461, 0.333, 0.179,
0.023, 0.332, 0.009, 0.02 , 0.008, 0.031, 0.052, 0.164, 0.135,
0.028, 0.052, 0.005, 0.121, 0.005, 0.101, 0.083, 0.142, 0.066,
0.049, 0.047, 0.025, 0.04 , 0.298, 0.22 , 0.225, 0.451, 0.232,
0.301, 0.269, 0.26 , 0.265, 0.243, 0.38 , 0.305, 0.195, 0.021,
0.039, 0.029, 0.095, 0.014, 0.469, 0.015, 0.406, 0.5 , 0.059,
0.001, 0.013, 0.077, 0.006, 0.498, 0.015, 0.073, 0.408, 0.499,
0.016, 0.37 , 0.331, 0.015, 0.036, 0.173, 0.214, 0.172, 0.217,
0.028, 0.271, 0.223, 0.095, 0.085, 0.184, 0.035, 0.014, 0.373,
0.036, 0.145, 0.028, 0.231, 0.2 , 0.066, 0.309, 0.03 , 0.067,
0.333, 0.036, 0.017, 0.091, 0.242, 0.103, 0.475, 0.016, 0.159,
0.034, 0.017, 0.307, 0.144, 0.056, 0.369, 0.264, 0.093, 0.046,
0.031, 0.358, 0.11 , 0.141, 0.039, 0.357, 0.058, 0.264, 0.056,
0.112, 0.31 , 0.15 , 0.001, 0.234, 0.159, 0.003, 0.006, 0.122,
0.03 , 0.332, 0.456, 0.012, 0.081, 0.41 , 0.369, 0.481, 0.275,
0.327, 0.152, 0.392, 0.044, 0.083, 0.034, 0.136, 0.137, 0.21 ,
0.465, 0.42 , 0.078, 0.438, 0.001, 0.036, 0.218, 0.069, 0.149,
0.009, 0.011, 0.011, 0.327, 0.488, 0.333, 0.207])
# quadrant: 1 HH, 2 LH, 3 LL, 4 HL
pd.Series(lisaCOVID.q).value_counts()
1 94 4 43 3 42 2 17 dtype: int64
Agregar a la correlación espacial
prov_Ag_map['COVID_quadrant']=[l if p <0.05 else 0 for l,p in zip(lisaCOVID.q,lisaCOVID.p_sim) ]
prov_Ag_map['COVID_quadrant'].value_counts()
0 131 1 32 3 21 4 9 2 3 Name: COVID_quadrant, dtype: int64
#recodificar
labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']
prov_Ag_map['COVID_quadrant_names']=[labels[i] for i in prov_Ag_map['COVID_quadrant']]
prov_Ag_map['COVID_quadrant_names'].value_counts()
0 no_sig 131 1 hotSpot 32 3 coldSpot 21 4 hotOutlier 9 2 coldOutlier 3 Name: COVID_quadrant_names, dtype: int64
Regraficar
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])
f, ax = plt.subplots(1, figsize=(12,12))
plt.title('Spots and Outliers')
prov_Ag_map.plot(column='COVID_quadrant_names',
categorical=True,
cmap=myColMap,
linewidth=0.1,
edgecolor='white',
legend=True,
legend_kwds={'loc': 'center left',
'bbox_to_anchor': (0.7, 0.6)},
ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()
prov_Ag_map.explore("COVID_quadrant_names", categorical=True,tooltip='location',cmap=myColMap)